us <- read.csv("https://raw.githubusercontent.com/JaclynCoate/6373_Time_Series/master/TermProject/Data/USdaily.7.26.20.csv", header = T, strip.white = T)
us <- transform(us, date = as.Date(as.character(date), "%Y%m%d"))
us <- subset(us, select = -c(states, dateChecked, hospitalized, lastModified, total, posNeg, totalTestResultsIncrease, hash))
us[is.na(us)] <- 0
us = us[order(as.Date(us$date, format = "%Y%m%d")),]
head(us)
## date positive negative pending hospitalizedCurrently
## 187 2020-01-22 2 0 0 0
## 186 2020-01-23 2 0 0 0
## 185 2020-01-24 2 0 0 0
## 184 2020-01-25 2 0 0 0
## 183 2020-01-26 2 0 0 0
## 182 2020-01-27 2 0 0 0
## hospitalizedCumulative inIcuCurrently inIcuCumulative
## 187 0 0 0
## 186 0 0 0
## 185 0 0 0
## 184 0 0 0
## 183 0 0 0
## 182 0 0 0
## onVentilatorCurrently onVentilatorCumulative recovered death
## 187 0 0 0 0
## 186 0 0 0 0
## 185 0 0 0 0
## 184 0 0 0 0
## 183 0 0 0 0
## 182 0 0 0 0
## totalTestResults deathIncrease hospitalizedIncrease negativeIncrease
## 187 2 0 0 0
## 186 2 0 0 0
## 185 2 0 0 0
## 184 2 0 0 0
## 183 2 0 0 0
## 182 2 0 0 0
## positiveIncrease
## 187 0
## 186 0
## 185 0
## 184 0
## 183 0
## 182 0
It is difficult to assume stationarity for this data due to multiple factors. We are working under the assumption that COVID is a novel virus and cases as well as hospitalizations will eventually return to zero. This being said our current modeling techniques do things such as return to the median or mimic the previously seen trends. Also, we see a heavy wandering trend in both new cases and hospitalization would be dependent on this as well as time. We will review the data and see what, if any, non-stationary components reveal themselves and model the data accordingly.
Traits:
ggplot(data = us, aes(x=date, y=hospitalizedCurrently))+
geom_line(color="orange")+
labs(title = "Current COVID Hospitalized Cases US", y = "Thousands", x = "") +
theme_fivethirtyeight()
When reviewing the variables and the scatter plot from our previous EDA we can see that there are some correlations that were expected, for example currentHospitalizations is correlated with the variables that refelct ICU and Ventilaor patients. These metrics wouldn’t exist without hospitalizations. These variables also cannot help up predict hospitalizations because they after occurances. So we will actually be leveraing variables such as positive increase in order to see if there is some sort of correlation between hospitalized patients and the number of positive cases.
ggplot(data = us, aes(x=date, y=positiveIncrease))+
geom_line(color="orange")+
labs(title = "Increase in COVID Cases US", y = "Thousands", x = "") +
theme_fivethirtyeight()
Since we are working with a seasonal and transformation component but it requires an additional transformation for the total positive COVID cases to become stationary for evaluation, we will only use positive increase of cases to predict currently hospitzliaed cases for the VAR model.
#Positive Cases Transformations
#pos.diff = artrans.wge(us$positive, phi.tr = 1, lag.max = 100)
#pos.diff.2 = artrans.wge(pos.diff, phi.tr = 1, lag.max = 100)
#pos.trans = artrans.wge(pos.diff.2,phi.tr = c(0,0,0,0,0,0,1), lag.max = 100)
#Increase Positive Transformation
inpos.diff = artrans.wge(us$positiveIncrease, phi.tr = 1, lag.max = 100)
inpos.trans = artrans.wge(inpos.diff,phi.tr = c(0,0,0,0,0,0,1), lag.max = 100)
#Current Hospitalization Transformations
us.diff = artrans.wge(us$hospitalizedCurrently, phi.tr = 1, lag.max = 100)
currhosp.trans = artrans.wge(us.diff,phi.tr = c(0,0,0,0,0,0,1), lag.max = 100)
#VARselect to choose lag
VARselect(cbind(currhosp.trans, inpos.trans), lag.max = 10, type= "both")
## $selection
## AIC(n) HQ(n) SC(n) FPE(n)
## 7 7 2 7
##
## $criteria
## 1 2 3 4 5
## AIC(n) 3.093857e+01 3.083329e+01 3.079091e+01 3.076976e+01 3.069554e+01
## HQ(n) 3.101650e+01 3.095018e+01 3.094676e+01 3.096458e+01 3.092932e+01
## SC(n) 3.113058e+01 3.112131e+01 3.117493e+01 3.124979e+01 3.127158e+01
## FPE(n) 2.731960e+13 2.459304e+13 2.357880e+13 2.309552e+13 2.145770e+13
## 6 7 8 9 10
## AIC(n) 3.066656e+01 3.051427e+01 3.053424e+01 3.059674e+01 3.059649e+01
## HQ(n) 3.093930e+01 3.082598e+01 3.088492e+01 3.098638e+01 3.102509e+01
## SC(n) 3.133860e+01 3.128233e+01 3.139830e+01 3.155681e+01 3.165257e+01
## FPE(n) 2.086403e+13 1.793906e+13 1.833019e+13 1.955149e+13 1.959499e+13
#fit model
usdiff.var.fit = VAR(cbind(currhosp.trans, inpos.trans), type = "both", p = 2)
#7 day predictions
pred.var7 = predict(usdiff.var.fit, n.ahead = 7)
pred.var7$fcst$currhosp.trans
## fcst lower upper CI
## [1,] -42.12350 -2942.410 2858.163 2900.286
## [2,] -385.17371 -3346.493 2576.145 2961.319
## [3,] -80.65766 -3262.529 3101.214 3181.871
## [4,] -124.14161 -3327.324 3079.041 3203.182
## [5,] -46.62794 -3284.538 3191.282 3237.910
## [6,] -43.58380 -3288.207 3201.039 3244.623
## [7,] -11.23201 -3262.277 3239.813 3251.045
b. 90 Day
#90 day predictions
pred.var90 = predict(usdiff.var.fit, n.ahead = 90)
pred.var90$fcst$currhosp.trans
## fcst lower upper CI
## [1,] -42.123501 -2942.410 2858.163 2900.286
## [2,] -385.173707 -3346.493 2576.145 2961.319
## [3,] -80.657665 -3262.529 3101.214 3181.871
## [4,] -124.141611 -3327.324 3079.041 3203.182
## [5,] -46.627942 -3284.538 3191.282 3237.910
## [6,] -43.583795 -3288.207 3201.039 3244.623
## [7,] -11.232008 -3262.277 3239.813 3251.045
## [8,] -6.349200 -3259.226 3246.528 3252.877
## [9,] 7.305513 -3246.870 3261.481 3254.175
## [10,] 12.421029 -3242.210 3267.053 3254.632
## [11,] 18.773716 -3236.131 3273.679 3254.905
## [12,] 22.480950 -3232.533 3277.495 3255.014
## [13,] 26.203504 -3228.870 3281.277 3255.073
## [14,] 28.923414 -3226.175 3284.022 3255.099
## [15,] 31.503265 -3223.608 3286.615 3255.112
## [16,] 33.699320 -3221.418 3288.817 3255.117
## [17,] 35.776726 -3219.344 3290.897 3255.120
## [18,] 37.697587 -3217.424 3292.819 3255.122
## [19,] 39.549962 -3215.572 3294.672 3255.122
## [20,] 41.334045 -3213.789 3296.457 3255.123
## [21,] 43.082251 -3212.041 3298.205 3255.123
## [22,] 44.799892 -3210.323 3299.923 3255.123
## [23,] 46.499486 -3208.623 3301.622 3255.123
## [24,] 48.185069 -3206.938 3303.308 3255.123
## [25,] 49.861824 -3205.261 3304.985 3255.123
## [26,] 51.532055 -3203.591 3306.655 3255.123
## [27,] 53.198022 -3201.925 3308.321 3255.123
## [28,] 54.860928 -3200.262 3309.984 3255.123
## [29,] 56.521787 -3198.601 3311.645 3255.123
## [30,] 58.181203 -3196.942 3313.304 3255.123
## [31,] 59.839642 -3195.283 3314.963 3255.123
## [32,] 61.497398 -3193.626 3316.620 3255.123
## [33,] 63.154688 -3191.968 3318.278 3255.123
## [34,] 64.811654 -3190.311 3319.935 3255.123
## [35,] 66.468399 -3188.655 3321.591 3255.123
## [36,] 68.124990 -3186.998 3323.248 3255.123
## [37,] 69.781476 -3185.341 3324.904 3255.123
## [38,] 71.437889 -3183.685 3326.561 3255.123
## [39,] 73.094252 -3182.029 3328.217 3255.123
## [40,] 74.750580 -3180.372 3329.874 3255.123
## [41,] 76.406884 -3178.716 3331.530 3255.123
## [42,] 78.063172 -3177.060 3333.186 3255.123
## [43,] 79.719449 -3175.404 3334.842 3255.123
## [44,] 81.375717 -3173.747 3336.499 3255.123
## [45,] 83.031981 -3172.091 3338.155 3255.123
## [46,] 84.688240 -3170.435 3339.811 3255.123
## [47,] 86.344498 -3168.778 3341.467 3255.123
## [48,] 88.000753 -3167.122 3343.124 3255.123
## [49,] 89.657007 -3165.466 3344.780 3255.123
## [50,] 91.313260 -3163.810 3346.436 3255.123
## [51,] 92.969513 -3162.153 3348.092 3255.123
## [52,] 94.625766 -3160.497 3349.749 3255.123
## [53,] 96.282018 -3158.841 3351.405 3255.123
## [54,] 97.938270 -3157.185 3353.061 3255.123
## [55,] 99.594521 -3155.528 3354.717 3255.123
## [56,] 101.250773 -3153.872 3356.374 3255.123
## [57,] 102.907025 -3152.216 3358.030 3255.123
## [58,] 104.563276 -3150.560 3359.686 3255.123
## [59,] 106.219528 -3148.903 3361.342 3255.123
## [60,] 107.875779 -3147.247 3362.999 3255.123
## [61,] 109.532031 -3145.591 3364.655 3255.123
## [62,] 111.188282 -3143.935 3366.311 3255.123
## [63,] 112.844534 -3142.278 3367.967 3255.123
## [64,] 114.500785 -3140.622 3369.624 3255.123
## [65,] 116.157037 -3138.966 3371.280 3255.123
## [66,] 117.813288 -3137.310 3372.936 3255.123
## [67,] 119.469540 -3135.653 3374.592 3255.123
## [68,] 121.125791 -3133.997 3376.249 3255.123
## [69,] 122.782043 -3132.341 3377.905 3255.123
## [70,] 124.438294 -3130.685 3379.561 3255.123
## [71,] 126.094546 -3129.028 3381.218 3255.123
## [72,] 127.750797 -3127.372 3382.874 3255.123
## [73,] 129.407049 -3125.716 3384.530 3255.123
## [74,] 131.063300 -3124.060 3386.186 3255.123
## [75,] 132.719552 -3122.403 3387.843 3255.123
## [76,] 134.375803 -3120.747 3389.499 3255.123
## [77,] 136.032055 -3119.091 3391.155 3255.123
## [78,] 137.688306 -3117.435 3392.811 3255.123
## [79,] 139.344558 -3115.778 3394.468 3255.123
## [80,] 141.000809 -3114.122 3396.124 3255.123
## [81,] 142.657061 -3112.466 3397.780 3255.123
## [82,] 144.313312 -3110.810 3399.436 3255.123
## [83,] 145.969564 -3109.153 3401.093 3255.123
## [84,] 147.625815 -3107.497 3402.749 3255.123
## [85,] 149.282067 -3105.841 3404.405 3255.123
## [86,] 150.938318 -3104.185 3406.061 3255.123
## [87,] 152.594570 -3102.528 3407.718 3255.123
## [88,] 154.250821 -3100.872 3409.374 3255.123
## [89,] 155.907073 -3099.216 3411.030 3255.123
## [90,] 157.563324 -3097.560 3412.686 3255.123
startingPoints7 = us$hospitalizedCurrently[126:132]
currHospForecasts7 = pred.var7$fcst$currhosp.trans[,1:3] + startingPoints7
b. 90 Day
startingPoints90 = us$hospitalizedCurrently[43:132]
currHospForecasts90 = pred.var90$fcst$currhosp.trans[,1:3] + startingPoints90
#7 day Forecasts
plot(seq(1,132,1), us$hospitalizedCurrently, type = "l", xlim = c(0,139), ylab = "Currently Hospitalized COVID Patients", main = "7 Day Currently Hospitalized Patients Forecast")
lines(seq(133,139,1), as.data.frame(currHospForecasts7)$fcst, type = "l", col = "red")
b. 90 Day
#90 day Forecasts
plot(seq(1,132,1), us$hospitalizedCurrently, type = "l", xlim = c(0,222), ylab = "Currently Hospitalized COVID Patients", main = "90 Day Currently Hospitalized Patients Forecast")
lines(seq(133,222,1), as.data.frame(currHospForecasts90)$fcst, type = "l", col = "red")
#varASE = mean((us$hospitalizedCurrently[123:132]-pred.var$fcst$y1[1:10])^2)
#varASE
Since we have been looking at additional regressors for all our above models and know that the best regressor to leverage will be positive increase of COVID cases. This regressor reflects similar behavior to that of current hospitlizations and can be model properly against hospitalizations. 1. Create Current Hospitalized ts variable
tUScurrhop = ts(us$hospitalizedCurrently)
tUSx = data.frame(positiveIncrease = ts(us$positiveIncrease))
fit.mlp.new = mlp(ts(tUSx), reps = 50, comb = "mean")
mlp.new.fore7 = forecast(fit.mlp.new, h = 7)
mlp.new.fore7
## Point Forecast
## 133 60186.21
## 134 63506.35
## 135 65111.61
## 136 66084.49
## 137 65414.51
## 138 65068.91
## 139 64748.12
b. 90 Day Forecast for Regressor
mlp.new.fore90 = forecast(fit.mlp.new, h = 90)
mlp.new.fore90
## Point Forecast
## 133 60186.21
## 134 63506.35
## 135 65111.61
## 136 66084.49
## 137 65414.51
## 138 65068.91
## 139 64748.12
## 140 64941.03
## 141 65054.26
## 142 65210.41
## 143 65215.74
## 144 65227.35
## 145 65134.13
## 146 65107.93
## 147 65103.52
## 148 65172.30
## 149 65220.57
## 150 65250.04
## 151 65202.03
## 152 65130.42
## 153 65101.87
## 154 65138.55
## 155 65197.53
## 156 65238.83
## 157 65235.85
## 158 65169.73
## 159 65119.73
## 160 65115.96
## 161 65166.04
## 162 65214.65
## 163 65239.64
## 164 65209.92
## 165 65143.25
## 166 65113.64
## 167 65135.06
## 168 65188.15
## 169 65227.84
## 170 65233.46
## 171 65178.77
## 172 65126.35
## 173 65115.60
## 174 65158.29
## 175 65207.00
## 176 65235.40
## 177 65214.79
## 178 65150.66
## 179 65116.58
## 180 65130.06
## 181 65180.96
## 182 65222.17
## 183 65233.21
## 184 65186.45
## 185 65131.39
## 186 65115.69
## 187 65152.21
## 188 65201.09
## 189 65232.02
## 190 65218.77
## 191 65157.55
## 192 65119.72
## 193 65126.63
## 194 65175.17
## 195 65217.64
## 196 65233.14
## 197 65193.27
## 198 65136.17
## 199 65116.13
## 200 65147.01
## 201 65196.05
## 202 65229.26
## 203 65222.28
## 204 65164.16
## 205 65122.70
## 206 65123.89
## 207 65169.95
## 208 65213.63
## 209 65232.96
## 210 65199.43
## 211 65140.86
## 212 65116.86
## 213 65142.31
## 214 65191.36
## 215 65226.65
## 216 65225.23
## 217 65170.58
## 218 65125.66
## 219 65121.70
## 220 65164.97
## 221 65209.79
## 222 65232.52
new.regressor7 <- data.frame(c(us$positiveIncrease, mlp.new.fore7$mean))
new.regressor7
## c.us.positiveIncrease..mlp.new.fore7.mean.
## 1 3613.00
## 2 3171.00
## 3 4671.00
## 4 6255.00
## 5 6885.00
## 6 9259.00
## 7 11442.00
## 8 10632.00
## 9 12873.00
## 10 17656.00
## 11 19044.00
## 12 19724.00
## 13 19655.00
## 14 21897.00
## 15 24694.00
## 16 25773.00
## 17 27992.00
## 18 31945.00
## 19 33252.00
## 20 25550.00
## 21 28937.00
## 22 30737.00
## 23 30534.00
## 24 34419.00
## 25 34351.00
## 26 30561.00
## 27 27844.00
## 28 25133.00
## 29 25631.00
## 30 30298.00
## 31 30923.00
## 32 31964.00
## 33 27963.00
## 34 27468.00
## 35 25872.00
## 36 26333.00
## 37 28916.00
## 38 31784.00
## 39 34174.00
## 40 35901.00
## 41 27380.00
## 42 22605.00
## 43 25219.00
## 44 26641.00
## 45 29568.00
## 46 33056.00
## 47 29185.00
## 48 25767.00
## 49 22523.00
## 50 22449.00
## 51 25056.00
## 52 27490.00
## 53 27605.00
## 54 24810.00
## 55 21504.00
## 56 18236.00
## 57 22663.00
## 58 21193.00
## 59 26705.00
## 60 24710.00
## 61 24701.00
## 62 20171.00
## 63 20992.00
## 64 20853.00
## 65 21319.00
## 66 26513.00
## 67 24555.00
## 68 21590.00
## 69 20105.00
## 70 18798.00
## 71 16629.00
## 72 19385.00
## 73 22601.00
## 74 23522.00
## 75 23762.00
## 76 21639.00
## 77 20415.00
## 78 19982.00
## 79 20312.00
## 80 20839.00
## 81 23352.00
## 82 23106.00
## 83 18823.00
## 84 17012.00
## 85 17175.00
## 86 20803.00
## 87 22032.00
## 88 23477.00
## 89 25341.00
## 90 21373.00
## 91 18510.00
## 92 23435.00
## 93 23873.00
## 94 27527.00
## 95 31046.00
## 96 31994.00
## 97 27284.00
## 98 27017.00
## 99 33021.00
## 100 38684.00
## 101 39072.00
## 102 44421.00
## 103 43783.00
## 104 41857.00
## 105 39175.00
## 106 47462.00
## 107 50674.00
## 108 53826.00
## 109 54223.00
## 110 54734.00
## 111 45789.00
## 112 41600.00
## 113 51766.00
## 114 62147.00
## 115 58836.00
## 116 66645.00
## 117 63007.00
## 118 60978.00
## 119 58465.00
## 120 62879.00
## 121 65382.00
## 122 70953.00
## 123 77233.00
## 124 65180.00
## 125 64884.00
## 126 56971.00
## 127 63642.00
## 128 69150.00
## 129 71027.00
## 130 75193.00
## 131 65413.00
## 132 61713.00
## 133 60186.21
## 134 63506.35
## 135 65111.61
## 136 66084.49
## 137 65414.51
## 138 65068.91
## 139 64748.12
b. 90 day regressor var
new.regressor90 <- data.frame(c(us$positiveIncrease, mlp.new.fore90$mean))
new.regressor90
## c.us.positiveIncrease..mlp.new.fore90.mean.
## 1 3613.00
## 2 3171.00
## 3 4671.00
## 4 6255.00
## 5 6885.00
## 6 9259.00
## 7 11442.00
## 8 10632.00
## 9 12873.00
## 10 17656.00
## 11 19044.00
## 12 19724.00
## 13 19655.00
## 14 21897.00
## 15 24694.00
## 16 25773.00
## 17 27992.00
## 18 31945.00
## 19 33252.00
## 20 25550.00
## 21 28937.00
## 22 30737.00
## 23 30534.00
## 24 34419.00
## 25 34351.00
## 26 30561.00
## 27 27844.00
## 28 25133.00
## 29 25631.00
## 30 30298.00
## 31 30923.00
## 32 31964.00
## 33 27963.00
## 34 27468.00
## 35 25872.00
## 36 26333.00
## 37 28916.00
## 38 31784.00
## 39 34174.00
## 40 35901.00
## 41 27380.00
## 42 22605.00
## 43 25219.00
## 44 26641.00
## 45 29568.00
## 46 33056.00
## 47 29185.00
## 48 25767.00
## 49 22523.00
## 50 22449.00
## 51 25056.00
## 52 27490.00
## 53 27605.00
## 54 24810.00
## 55 21504.00
## 56 18236.00
## 57 22663.00
## 58 21193.00
## 59 26705.00
## 60 24710.00
## 61 24701.00
## 62 20171.00
## 63 20992.00
## 64 20853.00
## 65 21319.00
## 66 26513.00
## 67 24555.00
## 68 21590.00
## 69 20105.00
## 70 18798.00
## 71 16629.00
## 72 19385.00
## 73 22601.00
## 74 23522.00
## 75 23762.00
## 76 21639.00
## 77 20415.00
## 78 19982.00
## 79 20312.00
## 80 20839.00
## 81 23352.00
## 82 23106.00
## 83 18823.00
## 84 17012.00
## 85 17175.00
## 86 20803.00
## 87 22032.00
## 88 23477.00
## 89 25341.00
## 90 21373.00
## 91 18510.00
## 92 23435.00
## 93 23873.00
## 94 27527.00
## 95 31046.00
## 96 31994.00
## 97 27284.00
## 98 27017.00
## 99 33021.00
## 100 38684.00
## 101 39072.00
## 102 44421.00
## 103 43783.00
## 104 41857.00
## 105 39175.00
## 106 47462.00
## 107 50674.00
## 108 53826.00
## 109 54223.00
## 110 54734.00
## 111 45789.00
## 112 41600.00
## 113 51766.00
## 114 62147.00
## 115 58836.00
## 116 66645.00
## 117 63007.00
## 118 60978.00
## 119 58465.00
## 120 62879.00
## 121 65382.00
## 122 70953.00
## 123 77233.00
## 124 65180.00
## 125 64884.00
## 126 56971.00
## 127 63642.00
## 128 69150.00
## 129 71027.00
## 130 75193.00
## 131 65413.00
## 132 61713.00
## 133 60186.21
## 134 63506.35
## 135 65111.61
## 136 66084.49
## 137 65414.51
## 138 65068.91
## 139 64748.12
## 140 64941.03
## 141 65054.26
## 142 65210.41
## 143 65215.74
## 144 65227.35
## 145 65134.13
## 146 65107.93
## 147 65103.52
## 148 65172.30
## 149 65220.57
## 150 65250.04
## 151 65202.03
## 152 65130.42
## 153 65101.87
## 154 65138.55
## 155 65197.53
## 156 65238.83
## 157 65235.85
## 158 65169.73
## 159 65119.73
## 160 65115.96
## 161 65166.04
## 162 65214.65
## 163 65239.64
## 164 65209.92
## 165 65143.25
## 166 65113.64
## 167 65135.06
## 168 65188.15
## 169 65227.84
## 170 65233.46
## 171 65178.77
## 172 65126.35
## 173 65115.60
## 174 65158.29
## 175 65207.00
## 176 65235.40
## 177 65214.79
## 178 65150.66
## 179 65116.58
## 180 65130.06
## 181 65180.96
## 182 65222.17
## 183 65233.21
## 184 65186.45
## 185 65131.39
## 186 65115.69
## 187 65152.21
## 188 65201.09
## 189 65232.02
## 190 65218.77
## 191 65157.55
## 192 65119.72
## 193 65126.63
## 194 65175.17
## 195 65217.64
## 196 65233.14
## 197 65193.27
## 198 65136.17
## 199 65116.13
## 200 65147.01
## 201 65196.05
## 202 65229.26
## 203 65222.28
## 204 65164.16
## 205 65122.70
## 206 65123.89
## 207 65169.95
## 208 65213.63
## 209 65232.96
## 210 65199.43
## 211 65140.86
## 212 65116.86
## 213 65142.31
## 214 65191.36
## 215 65226.65
## 216 65225.23
## 217 65170.58
## 218 65125.66
## 219 65121.70
## 220 65164.97
## 221 65209.79
## 222 65232.52
mlp.fit1 = mlp(tUScurrhop, xreg = tUSx, outplot = T, comb = "mean")
plot(mlp.fit1)
currhosp.fore7 = forecast(mlp.fit1, h = 7, xreg = new.regressor7)
plot(currhosp.fore7)
b. Currently Hospitalized 90 Day Forecast w/ Positive Increaser Regressor
currhosp.fore90 = forecast(mlp.fit1, h = 90, xreg = new.regressor90)
plot(currhosp.fore90)
#if (!require(devtools)) {
# install.packages("devtools")
#}
#devtools::install_github("josephsdavid/tswgewrapped", build_vignettes = TRUE)
library(tswgewrapped)
data_train.m <- data.frame(hospitalizedCurrently = us$hospitalizedCurrently[1:122], positiveIncrease = us$positiveIncrease[1:122])
data_test.m <- data.frame(hospitalizedCurrently = us$hospitalizedCurrently[123:132], positiveIncrease = us$positiveIncrease[123:132])
# search for best NN hyperparameters in given grid
model.m = tswgewrapped::ModelBuildNNforCaret$new(data = data_train.m, var_interest = "hospitalizedCurrently",
search = 'random', tuneLength = 5, parallel = TRUE,
batch_size = 50, h = 7, m = 7,
verbose = 1)
## Registered S3 method overwritten by 'lava':
## method from
## print.pcor greybox
## Aggregating results
## Selecting tuning parameters
## Fitting reps = 10, hd = 2, allow.det.season = TRUE on full training set
## - Total Time for training: : 49.928 sec elapsed
res.m <- model.m$summarize_hyperparam_results()
res.m
## reps hd allow.det.season RMSE ASE RMSESD ASESD
## 1 10 2 TRUE 3103.429 13970241 2184.689 16296534
## 2 11 3 FALSE 3129.341 14942705 2380.109 19673017
## 3 16 1 FALSE 3559.638 16480775 2047.127 15988977
## 4 16 3 TRUE 3204.752 15391189 2373.358 19509361
## 5 22 3 FALSE 3268.191 16196849 2463.200 19979406
model.m$plot_hyperparam_results()
best.m <- model.m$summarize_best_hyperparams()
best.m
## reps hd allow.det.season
## 1 10 2 TRUE
final.ase.m <- dplyr::filter(res.m, reps == best.m$reps &
hd == best.m$hd &
allow.det.season == best.m$allow.det.season)[['ASE']]
final.ase.m
## [1] 13970241
# Final Model
caret_model.m = model.m$get_final_models(subset = 'a')
caret_model.m$finalModel
## MLP fit with 2 hidden nodes and 10 repetitions.
## Series modelled in differences: D1.
## Univariate lags: (1,2,3)
## 1 regressor included.
## - Regressor 1 lags: (1,3)
## Forecast combined using the median operator.
## MSE: 1065689.5459.
# Plot Final Model
plot(caret_model.m$finalModel)
We have leveraged the tswgewrapper code above to produce a hyperparameter tuned NN model for our ensemble model. This model will work as our higher functioning ensemble model. Below are the steps to surface the best hyperparameters based on our grid search
#Ensemble model
ensemble.mlp = mlp(tUScurrhop, xreg = tUSx, outplot = T, reps = 10, hd = 2, allow.det.season = T)
ensemble.mlp
## MLP fit with 2 hidden nodes and 10 repetitions.
## Series modelled in differences: D1.
## Univariate lags: (1,2,3)
## 1 regressor included.
## - Regressor 1 lags: (1,3)
## Forecast combined using the median operator.
## MSE: 1025155.9706.
#Plot ensemble model
plot(ensemble.mlp)
fore7.m = forecast(ensemble.mlp, xreg = new.regressor7, h=7)
plot(fore7.m)
fore90.m = forecast(ensemble.mlp, xreg = new.regressor90, h=90)
plot(fore90.m)